# genetic risk score

setwd("C:/Users/sijia/Desktop/current working dictionary/05.发表论文/08.代谢组/03.data and results/data/genetic_score")
#metabolities data
data <- read.csv("0327_genetic_score_4593.csv")
dim(data)
# [1] 4593 1240
hist(data$HMGCR_LM)
hist(data$ACLY_LM)

data['HMGCR_n'] = qnorm(rank(as.numeric(data[,"HMGCR_LM"]))/(length(as.numeric(data[,"HMGCR_LM"]))+1),mean=0,sd=1)

data['ACLY_n'] = qnorm(rank(as.numeric(data[,"ACLY_LM"]))/(length(as.numeric(data[,"ACLY_LM"]))+1),mean=0,sd=1)
hist(data$HMGCR_n)
hist(data$ACLY_n)

write.csv(data,"0610_genetic_score_LM.csv")

# using stata
import delimited "C:\Users\sijia\Desktop\current working dictionary\05.发表论文\08.代谢组\03.data and results\data\genetic_score\0610_genetic_score_LM.csv"
# (1243 vars, 4593 obs)

egen ACLY_2g=cut(acly_n), group(2) label
egen HMGCR_2g=cut(hmgcr_n), group(2) label
gen acly_hls_4g=0 if ACLY_2g==1 & healthy_score2<=2
replace acly_hls_4g=1 if ACLY_2g==0 & healthy_score2<=2
replace acly_hls_4g=2 if ACLY_2g==1 & healthy_score2>2
replace acly_hls_4g=3 if ACLY_2g==0 & healthy_score2>2

gen hmgcr_hls_4g=0 if HMGCR_2g==1 & healthy_score2<=2
replace hmgcr_hls_4g=1 if HMGCR_2g==0 & healthy_score2<=2
replace hmgcr_hls_4g=2 if HMGCR_2g==1 & healthy_score2>2
replace hmgcr_hls_4g=3 if HMGCR_2g==0 & healthy_score2>2

#0610_genetic_score_LM_4g.csv
rm(list=ls())
data <- read.csv("0327_genetic_score_4593.csv")
fourgs <- read.csv("0610_genetic_score_LM_4g.csv")
sum(data$studyid!=fourgs$studyid) #0
data$hmgcr_hls_4g <- fourgs$hmgcr_hls_4g
data$acly_hls_4g <- fourgs$acly_hls_4g

colnames(data)[20:244] #studyid status original metabolities
colnames(data)[245:469] #log-transfer metabolities
colnames(data)[470:694] #log/sd-transfer metabolities
colnames(data)[695:919] #int-transfer metabolities

metabolity_name<-c("nmr_xxl_vldl_p","nmr_xxl_vldl_l","nmr_xxl_vldl_pl","nmr_xxl_vldl_c","nmr_xxl_vldl_ce","nmr_xxl_vldl_fc","nmr_xxl_vldl_tg","nmr_xl_vldl_p","nmr_xl_vldl_l","nmr_xl_vldl_pl","nmr_xl_vldl_c","nmr_xl_vldl_ce","nmr_xl_vldl_fc","nmr_xl_vldl_tg","nmr_l_vldl_p","nmr_l_vldl_l","nmr_l_vldl_pl","nmr_l_vldl_c","nmr_l_vldl_ce","nmr_l_vldl_fc","nmr_l_vldl_tg","nmr_m_vldl_p","nmr_m_vldl_l","nmr_m_vldl_pl","nmr_m_vldl_c","nmr_m_vldl_ce","nmr_m_vldl_fc","nmr_m_vldl_tg","nmr_s_vldl_p","nmr_s_vldl_l","nmr_s_vldl_pl","nmr_s_vldl_c","nmr_s_vldl_ce","nmr_s_vldl_fc","nmr_s_vldl_tg","nmr_xs_vldl_p","nmr_xs_vldl_l","nmr_xs_vldl_pl","nmr_xs_vldl_c","nmr_xs_vldl_ce","nmr_xs_vldl_fc","nmr_xs_vldl_tg","nmr_idl_p","nmr_idl_l","nmr_idl_pl","nmr_idl_c","nmr_idl_ce","nmr_idl_fc","nmr_idl_tg","nmr_l_ldl_p","nmr_l_ldl_l","nmr_l_ldl_pl","nmr_l_ldl_c","nmr_l_ldl_ce","nmr_l_ldl_fc","nmr_l_ldl_tg","nmr_m_ldl_p","nmr_m_ldl_l","nmr_m_ldl_pl","nmr_m_ldl_c","nmr_m_ldl_ce","nmr_m_ldl_fc","nmr_m_ldl_tg","nmr_s_ldl_p","nmr_s_ldl_l","nmr_s_ldl_pl","nmr_s_ldl_c","nmr_s_ldl_ce","nmr_s_ldl_fc","nmr_s_ldl_tg","nmr_xl_hdl_p","nmr_xl_hdl_l","nmr_xl_hdl_pl","nmr_xl_hdl_c","nmr_xl_hdl_ce","nmr_xl_hdl_fc","nmr_xl_hdl_tg","nmr_l_hdl_p","nmr_l_hdl_l","nmr_l_hdl_pl","nmr_l_hdl_c","nmr_l_hdl_ce","nmr_l_hdl_fc","nmr_l_hdl_tg","nmr_m_hdl_p","nmr_m_hdl_l","nmr_m_hdl_pl","nmr_m_hdl_c","nmr_m_hdl_ce","nmr_m_hdl_fc","nmr_m_hdl_tg","nmr_s_hdl_p","nmr_s_hdl_l","nmr_s_hdl_pl","nmr_s_hdl_c","nmr_s_hdl_ce","nmr_s_hdl_fc","nmr_s_hdl_tg","nmr_xxl_vldl_pl_per","nmr_xxl_vldl_c_per","nmr_xxl_vldl_ce_per","nmr_xxl_vldl_fc_per","nmr_xxl_vldl_tg_per","nmr_xl_vldl_pl_per","nmr_xl_vldl_c_per","nmr_xl_vldl_ce_per","nmr_xl_vldl_fc_per","nmr_xl_vldl_tg_per","nmr_l_vldl_pl_per","nmr_l_vldl_c_per","nmr_l_vldl_ce_per","nmr_l_vldl_fc_per","nmr_l_vldl_tg_per","nmr_m_vldl_pl_per","nmr_m_vldl_c_per","nmr_m_vldl_ce_per","nmr_m_vldl_fc_per","nmr_m_vldl_tg_per","nmr_s_vldl_pl_per","nmr_s_vldl_c_per","nmr_s_vldl_ce_per","nmr_s_vldl_fc_per","nmr_s_vldl_tg_per","nmr_xs_vldl_pl_per","nmr_xs_vldl_c_per","nmr_xs_vldl_ce_per","nmr_xs_vldl_fc_per","nmr_xs_vldl_tg_per","nmr_idl_pl_per","nmr_idl_c_per","nmr_idl_ce_per","nmr_idl_fc_per","nmr_idl_tg_per","nmr_l_ldl_pl_per","nmr_l_ldl_c_per","nmr_l_ldl_ce_per","nmr_l_ldl_fc_per","nmr_l_ldl_tg_per","nmr_m_ldl_pl_per","nmr_m_ldl_c_per","nmr_m_ldl_ce_per","nmr_m_ldl_fc_per","nmr_m_ldl_tg_per","nmr_s_ldl_pl_per","nmr_s_ldl_c_per","nmr_s_ldl_ce_per","nmr_s_ldl_fc_per","nmr_s_ldl_tg_per","nmr_xl_hdl_pl_per","nmr_xl_hdl_c_per","nmr_xl_hdl_ce_per","nmr_xl_hdl_fc_per","nmr_xl_hdl_tg_per","nmr_l_hdl_pl_per","nmr_l_hdl_c_per","nmr_l_hdl_ce_per","nmr_l_hdl_fc_per","nmr_l_hdl_tg_per","nmr_m_hdl_pl_per","nmr_m_hdl_c_per","nmr_m_hdl_ce_per","nmr_m_hdl_fc_per","nmr_m_hdl_tg_per","nmr_s_hdl_pl_per","nmr_s_hdl_c_per","nmr_s_hdl_ce_per","nmr_s_hdl_fc_per","nmr_s_hdl_tg_per","nmr_vldl_d","nmr_ldl_d","nmr_hdl_d","nmr_serum_c","nmr_vldl_c","nmr_remnant_c","nmr_ldl_c","nmr_hdl_c","nmr_hdl2_c","nmr_hdl3_c","nmr_estc","nmr_freec","nmr_serum_tg","nmr_vldl_tg","nmr_ldl_tg","nmr_hdl_tg","nmr_totpg","nmr_tg_pg","nmr_pc","nmr_sm","nmr_totcho","nmr_apoa1","nmr_apob","nmr_apob_apoa1","nmr_totfa","nmr_unsat","nmr_dha","nmr_la","nmr_faw3","nmr_faw6","nmr_pufa","nmr_mufa","nmr_sfa","nmr_dha_fa","nmr_la_fa","nmr_faw3_fa","nmr_faw6_fa","nmr_pufa_fa","nmr_mufa_fa","nmr_sfa_fa","nmr_glc","nmr_lac","nmr_cit","nmr_ala","nmr_gln","nmr_his","nmr_ile","nmr_leu","nmr_val","nmr_phe","nmr_tyr","nmr_ace","nmr_acace","nmr_bohbut","nmr_crea","nmr_alb","nmr_gp")
class(data$status_update)
class(data$region_code)
class(data$education)
class(data$fasting_time)
class(data$hmgcr_hls_4g)
data$region_code=factor(data$region_code)
data$education=factor(data$education)
data$hmgcr_hls_4g=factor(data$hmgcr_hls_4g)
data$acly_hls_4g=factor(data$acly_hls_4g)

HMGCR <- matrix(nrow = 225, ncol = 18)
rownames(HMGCR)=(metabolity_name)
ACLY <- matrix(nrow = 225, ncol = 18)
rownames(ACLY)=(metabolity_name)
colnames(HMGCR) <- c("estimate_1","se_1","zvalue_1","pr_1","lci_1","uci_1","estimate_2","se_2","zvalue_2","pr_2","lci_2","uci_2","estimate_3","se_3","zvalue_3","pr_3","lci_3","uci_3")
colnames(ACLY) <- c("estimate_1","se_1","zvalue_1","pr_1","lci_1","uci_1","estimate_2","se_2","zvalue_2","pr_2","lci_2","uci_2","estimate_3","se_3","zvalue_3","pr_3","lci_3","uci_3")

# adjustment: age (continuous), sex, fasting time (dichotomous), education level (3 groups), region (10 regions), and case/control status (4 groups), pc(1-10).
for (i in 1:225){
  model <- lm (data[,695+i-1]~hmgcr_hls_4g+age_at_study_date+is_female+education+fasting_time+region_code+status_update+pc1+pc2+pc3+pc4+pc5+pc6+pc7+pc8+pc9+pc10,data = data)
  HMGCR[i,1]<-coef(summary(model))[2,1]
  HMGCR[i,2]<-coef(summary(model))[2,2]
  HMGCR[i,3]<-coef(summary(model))[2,3]
  HMGCR[i,4]<-coef(summary(model))[2,4]
  HMGCR[i,5]<-confint(model,level=0.95)[2,1]
  HMGCR[i,6]<-confint(model,level=0.95)[2,2]
  HMGCR[i,7]<-coef(summary(model))[3,1]
  HMGCR[i,8]<-coef(summary(model))[3,2]
  HMGCR[i,9]<-coef(summary(model))[3,3]
  HMGCR[i,10]<-coef(summary(model))[3,4]
  HMGCR[i,11]<-confint(model,level=0.95)[3,1]
  HMGCR[i,12]<-confint(model,level=0.95)[3,2]
  HMGCR[i,13]<-coef(summary(model))[4,1]
  HMGCR[i,14]<-coef(summary(model))[4,2]
  HMGCR[i,15]<-coef(summary(model))[4,3]
  HMGCR[i,16]<-coef(summary(model))[4,4]
  HMGCR[i,17]<-confint(model,level=0.95)[4,1]
  HMGCR[i,18]<-confint(model,level=0.95)[4,2]
  print(i)
}

write.csv(HMGCR,file="0610_HMGCR_LM_HLs_4g.csv")

# adjustment: age (continuous), sex, fasting time (dichotomous), education level (3 groups), region (10 regions), and case/control status (4 groups), pc(1-10).
for (i in 1:225){
  model <- lm (data[,695+i-1]~acly_hls_4g+age_at_study_date+is_female+education+fasting_time+region_code+status_update+pc1+pc2+pc3+pc4+pc5+pc6+pc7+pc8+pc9+pc10,data = data)
  ACLY[i,1]<-coef(summary(model))[2,1]
  ACLY[i,2]<-coef(summary(model))[2,2]
  ACLY[i,3]<-coef(summary(model))[2,3]
  ACLY[i,4]<-coef(summary(model))[2,4]
  ACLY[i,5]<-confint(model,level=0.95)[2,1]
  ACLY[i,6]<-confint(model,level=0.95)[2,2]
  ACLY[i,7]<-coef(summary(model))[3,1]
  ACLY[i,8]<-coef(summary(model))[3,2]
  ACLY[i,9]<-coef(summary(model))[3,3]
  ACLY[i,10]<-coef(summary(model))[3,4]
  ACLY[i,11]<-confint(model,level=0.95)[3,1]
  ACLY[i,12]<-confint(model,level=0.95)[3,2]
  ACLY[i,13]<-coef(summary(model))[4,1]
  ACLY[i,14]<-coef(summary(model))[4,2]
  ACLY[i,15]<-coef(summary(model))[4,3]
  ACLY[i,16]<-coef(summary(model))[4,4]
  ACLY[i,17]<-confint(model,level=0.95)[4,1]
  ACLY[i,18]<-confint(model,level=0.95)[4,2]
  print(i)
}

write.csv(ACLY,file="0610_ACLY_LM_HLs_4g.csv")